In [ ]:
import rasterio
from rasterio.mask import mask
import geopandas as gpd
import numpy as np
from shapely.geometry import mapping
import geopandas as gpd
In [ ]:
import os # we need os to do some basic file operations

sentinal_fp = "../RandomForest/data_a/sentinel/"
# find every file in the sentinal_fp directory
sentinal_band_paths = [os.path.join(sentinal_fp, f) for f in os.listdir(sentinal_fp) if os.path.isfile(os.path.join(sentinal_fp, f))]
sentinal_band_paths.sort()
sentinal_band_paths
Out[ ]:
['../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B02_10m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B03_10m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B04_10m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B05_20m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B06_20m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B07_20m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B08_10m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B8A_20m_extent.tif']
In [ ]:
file_path = 'C:/Users/leoni/Documents/Uni/UGS/RandomForest/data_a/landcover/Data/DE033L1_AUGSBURG_UA2018_v013.gpkg'

# Load the dataset
data = gpd.read_file(file_path)
data
Out[ ]:
country fua_name fua_code code_2018 class_2018 prod_date identifier perimeter area comment Pop2018 geometry
0 DE Augsburg DE033L1 11100 Continuous urban fabric (S.L. : > 80%) 2020-09 425-DE033L1 155.888916 1185.135544 None 9 MULTIPOLYGON (((4387596.158 2801154.636, 43875...
1 DE Augsburg DE033L1 11100 Continuous urban fabric (S.L. : > 80%) 2020-09 785-DE033L1 204.742805 2617.844445 None 53 MULTIPOLYGON (((4385599.462 2805744.357, 43856...
2 DE Augsburg DE033L1 11100 Continuous urban fabric (S.L. : > 80%) 2020-09 751-DE033L1 397.305451 9451.982988 None 193 MULTIPOLYGON (((4389167.596 2805621.979, 43890...
3 DE Augsburg DE033L1 11210 Discontinuous dense urban fabric (S.L. : 50% -... 2020-09 5156-DE033L1 2683.734912 35662.775997 None 166 MULTIPOLYGON (((4394320.820 2808000.000, 43943...
4 DE Augsburg DE033L1 11100 Continuous urban fabric (S.L. : > 80%) 2020-09 1986-DE033L1 297.148595 4588.045376 None 60 MULTIPOLYGON (((4389395.144 2807242.564, 43893...
... ... ... ... ... ... ... ... ... ... ... ... ...
25378 DE Augsburg DE033L1 31000 Forests 2020-09 24837-DE033L1 682.883091 27090.306391 None 0 MULTIPOLYGON (((4411181.690 2815311.223, 44111...
25379 DE Augsburg DE033L1 32000 Herbaceous vegetation associations (natural gr... 2020-09 25099-DE033L1 485.354917 11090.189969 None 0 MULTIPOLYGON (((4397210.659 2824962.333, 43972...
25380 DE Augsburg DE033L1 32000 Herbaceous vegetation associations (natural gr... 2020-09 25134-DE033L1 528.446541 11505.819504 None 0 MULTIPOLYGON (((4397105.659 2832772.333, 43971...
25381 DE Augsburg DE033L1 50000 Water 2020-09 25235-DE033L1 568.167123 10591.167983 None 0 MULTIPOLYGON (((4388260.684 2807000.000, 43882...
25382 DE Augsburg DE033L1 23000 Pastures 2020-09 23246-DE033L1 763.153970 18744.576748 None 0 MULTIPOLYGON (((4391115.659 2828972.333, 43911...

25383 rows × 12 columns

In [ ]:
# create a products directory within the data dir which won't be uploaded to Github
img_dir = '../RandomForest/data_a/products2/'

# check to see if the dir it exists, if not, create it
if not os.path.exists(img_dir):
    os.makedirs(img_dir)

# filepath for image we're writing out
img_fp = img_dir + 'sentinel_bands.tif'

# Read metadata of first file and assume all other bands are the same
with rasterio.open(sentinal_band_paths[0]) as src0:
    meta = src0.meta

# Update metadata to reflect the number of layers
meta.update(count = len(sentinal_band_paths))

# # Read each layer and write it to stack
# with rasterio.open(img_fp, 'w', **meta) as dst:
#     for id, layer in enumerate(sentinal_band_paths, start=1):
#         with rasterio.open(layer) as src1:
#             dst.write_band(id, src1.read(1))
In [ ]:
full_dataset = rasterio.open(img_fp)
img_rows, img_cols = full_dataset.shape
img_bands = full_dataset.count
print(full_dataset.shape) # dimensions
print(full_dataset.count) # bands
(2197, 1478)
8
In [ ]:
path_composite = '../RandomForest/data_a/products/bands843_new.tif'
In [ ]:
import numpy as np
from rasterio.plot import show
import matplotlib.pyplot as plt

img_fp = path_composite

# Open the full extent of the raster dataset
with rasterio.open(img_fp) as dataset:
    # Read the specified bands (3, 2, 1 for RGB)
    img_data = dataset.read([3, 2, 1])
# Assuming you've already read the img_data
# Scale the data to 0-255 range, assuming it's 16-bit
img_data = np.clip(img_data / 4096 * 255, 0, 255).astype(np.uint8)

# Mask out no data values if necessary
no_data_value = -9999  # Replace with your actual no data value
img_data[img_data == no_data_value] = 0  # Set no data values to 0 for display

# Now display the image
fig, ax = plt.subplots(figsize=(10, 7))
show(img_data, ax=ax, transform=dataset.transform)

plt.show()
No description has been provided for this image
In [ ]:
full_dataset = rasterio.open(img_fp)
raster_crs = full_dataset.crs
In [ ]:
from rasterio.warp import calculate_default_transform, reproject, Resampling

# Define the source and destination coordinate reference systems
src_crs = 'EPSG:32632'
dst_crs = 'EPSG:4326'

# Open the original dataset
with rasterio.open('../RandomForest/data_a/products/bands843.tif') as src:
    # Calculate the transform and dimensions for the new dataset
    transform, width, height = calculate_default_transform(
        src.crs, dst_crs, src.width, src.height, *src.bounds)
    
    # Define the metadata for the new dataset
    kwargs = src.meta.copy()
    kwargs.update({
        'crs': dst_crs,
        'transform': transform,
        'width': width,
        'height': height
    })

    # Create the new dataset and reproject the original data into it
    with rasterio.open('../RandomForest/data_a/products/bands843_new.tif', 'w', **kwargs) as dst: ###change!!!!!!!!
        for i in range(1, src.count + 1):  # Loop through all bands
            reproject(
                source=rasterio.band(src, i),
                destination=rasterio.band(dst, i),
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=transform,
                dst_crs=dst_crs,
                resampling=Resampling.nearest)
In [ ]:
shapefile = gpd.read_file('C:/Users/leoni/Documents/Uni/UGS/RandomForest/data_a/landcover/Data/DE033L1_AUGSBURG_UA2018_v013.gpkg')
shapefile.crs
Out[ ]:
<Projected CRS: EPSG:3035>
Name: ETRS89-extended / LAEA Europe
Axis Info [cartesian]:
- Y[north]: Northing (metre)
- X[east]: Easting (metre)
Area of Use:
- name: Europe - European Union (EU) countries and candidates. Europe - onshore and offshore: Albania; Andorra; Austria; Belgium; Bosnia and Herzegovina; Bulgaria; Croatia; Cyprus; Czechia; Denmark; Estonia; Faroe Islands; Finland; France; Germany; Gibraltar; Greece; Hungary; Iceland; Ireland; Italy; Kosovo; Latvia; Liechtenstein; Lithuania; Luxembourg; Malta; Monaco; Montenegro; Netherlands; North Macedonia; Norway including Svalbard and Jan Mayen; Poland; Portugal including Madeira and Azores; Romania; San Marino; Serbia; Slovakia; Slovenia; Spain including Canary Islands; Sweden; Switzerland; Türkiye (Turkey); United Kingdom (UK) including Channel Islands and Isle of Man; Vatican City State.
- bounds: (-35.58, 24.6, 44.83, 84.73)
Coordinate Operation:
- name: Europe Equal Area 2001
- method: Lambert Azimuthal Equal Area
Datum: European Terrestrial Reference System 1989 ensemble
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich
In [ ]:
shapefile = shapefile.to_crs(epsg=32632)
shapefile.crs
Out[ ]:
<Projected CRS: EPSG:32632>
Name: WGS 84 / UTM zone 32N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 6°E and 12°E, northern hemisphere between equator and 84°N, onshore and offshore. Algeria. Austria. Cameroon. Denmark. Equatorial Guinea. France. Gabon. Germany. Italy. Libya. Liechtenstein. Monaco. Netherlands. Niger. Nigeria. Norway. Sao Tome and Principe. Svalbard. Sweden. Switzerland. Tunisia. Vatican City State.
- bounds: (6.0, 0.0, 12.0, 84.0)
Coordinate Operation:
- name: UTM zone 32N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [ ]:
print(shapefile)
shapefile["class_2018"].unique()
      country  fua_name fua_code code_2018  \
0          DE  Augsburg  DE033L1     11100   
1          DE  Augsburg  DE033L1     11100   
2          DE  Augsburg  DE033L1     11100   
3          DE  Augsburg  DE033L1     11210   
4          DE  Augsburg  DE033L1     11100   
...       ...       ...      ...       ...   
25378      DE  Augsburg  DE033L1     31000   
25379      DE  Augsburg  DE033L1     32000   
25380      DE  Augsburg  DE033L1     32000   
25381      DE  Augsburg  DE033L1     50000   
25382      DE  Augsburg  DE033L1     23000   

                                              class_2018 prod_date  \
0                 Continuous urban fabric (S.L. : > 80%)   2020-09   
1                 Continuous urban fabric (S.L. : > 80%)   2020-09   
2                 Continuous urban fabric (S.L. : > 80%)   2020-09   
3      Discontinuous dense urban fabric (S.L. : 50% -...   2020-09   
4                 Continuous urban fabric (S.L. : > 80%)   2020-09   
...                                                  ...       ...   
25378                                            Forests   2020-09   
25379  Herbaceous vegetation associations (natural gr...   2020-09   
25380  Herbaceous vegetation associations (natural gr...   2020-09   
25381                                              Water   2020-09   
25382                                           Pastures   2020-09   

          identifier    perimeter          area comment  Pop2018  \
0        425-DE033L1   155.888916   1185.135544    None        9   
1        785-DE033L1   204.742805   2617.844445    None       53   
2        751-DE033L1   397.305451   9451.982988    None      193   
3       5156-DE033L1  2683.734912  35662.775997    None      166   
4       1986-DE033L1   297.148595   4588.045376    None       60   
...              ...          ...           ...     ...      ...   
25378  24837-DE033L1   682.883091  27090.306391    None        0   
25379  25099-DE033L1   485.354917  11090.189969    None        0   
25380  25134-DE033L1   528.446541  11505.819504    None        0   
25381  25235-DE033L1   568.167123  10591.167983    None        0   
25382  23246-DE033L1   763.153970  18744.576748    None        0   

                                                geometry  
0      MULTIPOLYGON (((640671.445 5353635.798, 640668...  
1      MULTIPOLYGON (((638616.901 5358201.245, 638618...  
2      MULTIPOLYGON (((642184.268 5358123.727, 642109...  
3      MULTIPOLYGON (((647303.556 5360566.887, 647300...  
4      MULTIPOLYGON (((642390.773 5359747.470, 642356...  
...                                                  ...  
25378  MULTIPOLYGON (((664060.510 5368090.340, 664059...  
25379  MULTIPOLYGON (((649972.813 5377568.573, 649975...  
25380  MULTIPOLYGON (((649766.914 5385378.476, 649771...  
25381  MULTIPOLYGON (((641260.177 5359490.598, 641256...  
25382  MULTIPOLYGON (((643829.528 5381502.382, 643827...  

[25383 rows x 12 columns]
Out[ ]:
array(['Continuous urban fabric (S.L. : > 80%)',
       'Discontinuous dense urban fabric (S.L. : 50% -  80%)', 'Pastures',
       'Arable land (annual crops)',
       'Discontinuous medium density urban fabric (S.L. : 30% - 50%)',
       'Industrial, commercial, public, military and private units',
       'Other roads and associated land',
       'Discontinuous low density urban fabric (S.L. : 10% - 30%)',
       'Isolated structures', 'Railways and associated land',
       'Mineral extraction and dump sites', 'Land without current use',
       'Green urban areas', 'Sports and leisure facilities', 'Forests',
       'Water',
       'Discontinuous very low density urban fabric (S.L. : < 10%)',
       'Herbaceous vegetation associations (natural grassland, moors...)',
       'Fast transit roads and associated land', 'Construction sites',
       'Airports'], dtype=object)
In [ ]:
shapefile.groupby('code_2018')['class_2018'].unique()
Out[ ]:
code_2018
11100             [Continuous urban fabric (S.L. : > 80%)]
11210    [Discontinuous dense urban fabric (S.L. : 50% ...
11220    [Discontinuous medium density urban fabric (S....
11230    [Discontinuous low density urban fabric (S.L. ...
11240    [Discontinuous very low density urban fabric (...
11300                                [Isolated structures]
12100    [Industrial, commercial, public, military and ...
12210             [Fast transit roads and associated land]
12220                    [Other roads and associated land]
12230                       [Railways and associated land]
12400                                           [Airports]
13100                  [Mineral extraction and dump sites]
13300                                 [Construction sites]
13400                           [Land without current use]
14100                                  [Green urban areas]
14200                      [Sports and leisure facilities]
21000                         [Arable land (annual crops)]
23000                                           [Pastures]
31000                                            [Forests]
32000    [Herbaceous vegetation associations (natural g...
50000                                              [Water]
Name: class_2018, dtype: object
In [ ]:
shapefile = shapefile.to_crs({'init': 'epsg:4326'})
shapefile.crs
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\pyproj\crs\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6
  in_crs_string = _prepare_from_proj_string(in_crs_string)
Out[ ]:
<Geographic 2D CRS: +init=epsg:4326 +type=crs>
Name: WGS 84
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [ ]:
len(shapefile)
Out[ ]:
25383
In [ ]:
# this generates a list of shapely geometries
geoms = shapefile.geometry.values 

# let's grab a single shapely geometry to check
geometry = geoms[0] 
print(type(geometry))
print(geometry)

# transform to GeoJSON format
from shapely.geometry import mapping
feature = [mapping(geometry)] # can also do this using polygon.__geo_interface__
print(type(feature))
print(feature)
<class 'shapely.geometry.multipolygon.MultiPolygon'>
MULTIPOLYGON (((10.897603435546376 48.32024993125257, 10.897558639391013 48.32006889000404, 10.896796603606004 48.32015034274349, 10.896843224431512 48.32033682294201, 10.897603435546376 48.32024993125257)))
<class 'list'>
[{'type': 'MultiPolygon', 'coordinates': [(((10.897603435546376, 48.32024993125257), (10.897558639391013, 48.32006889000404), (10.896796603606004, 48.32015034274349), (10.896843224431512, 48.32033682294201), (10.897603435546376, 48.32024993125257)),)]}]
In [ ]:
out_image, out_transform = mask(full_dataset, feature, crop=True)
out_image.shape
Out[ ]:
(3, 4, 9)
In [ ]:
full_dataset.close()
In [ ]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
from shapely.geometry import box


# Load the shapefile
#shapefile = gpd.read_file(shapefile_path)

# Check if the CRS matches, if not, reproject
if shapefile.crs != raster_crs:
    shapefile = shapefile.to_crs(raster_crs)

# Check that geometries are valid
shapefile['valid'] = shapefile.is_valid
shapefile = shapefile[shapefile['valid']]


with rasterio.open(path_composite) as src:
    raster_bounds = src.bounds
    raster_bbox = box(*raster_bounds)  # Create a bounding box from the raster bounds

    # Only proceed if the raster and shapefile overlap
    if not shapefile.unary_union.intersects(raster_bbox):
        raise ValueError("Shapefile and raster do not overlap")
    else:
        print("The shapefile and raster overlap.")

    # Extract the raster values within the polygon
    for geom in shapefile.geometry:
        feature = [mapping(geom)]
        out_image, out_transform = mask(src, feature, crop=True)
        # process out_image
The shapefile and raster overlap.
---------------------------------------------------------------------------
WindowError                               Traceback (most recent call last)
File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\mask.py:80, in raster_geometry_mask(dataset, shapes, all_touched, invert, crop, pad, pad_width)
     79 try:
---> 80     window = geometry_window(dataset, shapes, pad_x=pad_x, pad_y=pad_y)
     82 except WindowError:
     83     # If shapes do not overlap raster, raise Exception or UserWarning
     84     # depending on value of crop

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\features.py:477, in geometry_window(dataset, shapes, pad_x, pad_y, north_up, rotated, pixel_precision, boundless)
    476 if not boundless:
--> 477     window = window.intersection(raster_window)
    479 return window

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\windows.py:775, in Window.intersection(self, other)
    763 """Return the intersection of this window and another
    764 
    765 Parameters
   (...)
    773 Window
    774 """
--> 775 return intersection([self, other])

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\windows.py:125, in iter_args.<locals>.wrapper(*args, **kwargs)
    124 if len(args) == 1 and isinstance(args[0], Iterable):
--> 125     return function(*args[0])
    126 else:

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\windows.py:239, in intersection(*windows)
    226 """Innermost extent of window intersections.
    227 
    228 Will raise WindowError if windows do not intersect.
   (...)
    237 Window
    238 """
--> 239 return functools.reduce(_intersection, windows)

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\windows.py:257, in _intersection(w1, w2)
    256 else:
--> 257     raise WindowError(f"Intersection is empty {w1} {w2}")

WindowError: Intersection is empty Window(col_off=2179, row_off=735, width=20, height=43) Window(col_off=0, row_off=0, width=1945, height=1966)

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[78], line 33
     31 for geom in shapefile.geometry:
     32     feature = [mapping(geom)]
---> 33     out_image, out_transform = mask(src, feature, crop=True)
     34     # process out_image

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\mask.py:178, in mask(dataset, shapes, all_touched, invert, nodata, filled, crop, pad, pad_width, indexes)
    175     else:
    176         nodata = 0
--> 178 shape_mask, transform, window = raster_geometry_mask(
    179     dataset, shapes, all_touched=all_touched, invert=invert, crop=crop,
    180     pad=pad, pad_width=pad_width)
    182 if indexes is None:
    183     out_shape = (dataset.count, ) + shape_mask.shape

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\mask.py:86, in raster_geometry_mask(dataset, shapes, all_touched, invert, crop, pad, pad_width)
     82 except WindowError:
     83     # If shapes do not overlap raster, raise Exception or UserWarning
     84     # depending on value of crop
     85     if crop:
---> 86         raise ValueError('Input shapes do not overlap raster.')
     87     else:
     88         warnings.warn('shapes are outside bounds of raster. '
     89                       'Are they in different coordinate reference systems?')

ValueError: Input shapes do not overlap raster.

Adjust Landcover Data to match the scene¶

In [ ]:
# Reproject the shapefile to match the raster's CRS
shapefile = shapefile.to_crs(epsg=32632)
In [ ]:
with rasterio.open(path_composite) as src:
    print(src.bounds)
print(shapefile.total_bounds)
BoundingBox(left=10.761412564981446, bottom=48.25669556872475, right=10.963972462286222, top=48.46144248805698)
[ 610820.72860213 5327718.5402929   670858.48189483 5388978.22555986]
In [ ]:
shapefile = shapefile.to_crs("EPSG:4326")
print(shapefile.total_bounds)
[10.494824  48.0892135 11.3108095 48.6385815]
In [ ]:
import matplotlib.pyplot as plt
import rasterio
import rasterio.plot

fig, ax = plt.subplots(figsize=(10, 10))
with rasterio.open(path_composite) as src:
    rasterio.plot.show(src, ax=ax)
shapefile.plot(ax=ax, facecolor='none', edgecolor='red')
plt.show()
No description has been provided for this image
In [ ]:
from shapely.geometry import box
from shapely.affinity import scale

shapefile_path = 'C:/Users/leoni/Documents/Uni/UGS/RandomForest/data_a/landcover/Data/DE033L1_AUGSBURG_UA2018_v013.gpkg'

# Open the raster file and get its bounds
with rasterio.open(path_composite) as src:
    bounds = src.bounds

# Create a slightly smaller bounding box geometry from the raster bounds
# Reduce each side of the bounding box by a small fraction, e.g., 0.99 to reduce by 1%
smaller_bbox = scale(box(bounds.left, bounds.bottom, bounds.right, bounds.top), xfact=0.99, yfact=0.99, origin='center')

# Create a GeoDataFrame from the smaller bounding box
smaller_bbox_gdf = gpd.GeoDataFrame({'geometry': [smaller_bbox]}, crs=src.crs)

# Load the shapefile
shapefile = gpd.read_file(shapefile_path)

# Make sure the shapefile is in the same CRS as the raster
shapefile = shapefile.to_crs(src.crs)

# Perform a spatial join to find features completely within the smaller bounding box
within_shapefile = gpd.sjoin(shapefile, smaller_bbox_gdf, op='within')

# Save the clipped shapefile to a new file
within_shapefile.to_file('within_shapefile_smaller_bbox.gpkg', driver='GPKG')
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\IPython\core\interactiveshell.py:3466: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
In [ ]:
within_shapefile = within_shapefile.groupby('class_2018', group_keys=False).apply(lambda x: x.sample(n=30, replace=True))
within_shapefile
Out[ ]:
country fua_name fua_code code_2018 class_2018 prod_date identifier perimeter area comment Pop2018 geometry index_right
15079 DE Augsburg DE033L1 12400 Airports 2020-09 15062-DE033L1 5540.286317 1.073691e+06 None 0 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0
15079 DE Augsburg DE033L1 12400 Airports 2020-09 15062-DE033L1 5540.286317 1.073691e+06 None 0 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0
15079 DE Augsburg DE033L1 12400 Airports 2020-09 15062-DE033L1 5540.286317 1.073691e+06 None 0 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0
15079 DE Augsburg DE033L1 12400 Airports 2020-09 15062-DE033L1 5540.286317 1.073691e+06 None 0 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0
15079 DE Augsburg DE033L1 12400 Airports 2020-09 15062-DE033L1 5540.286317 1.073691e+06 None 0 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0
... ... ... ... ... ... ... ... ... ... ... ... ... ...
24675 DE Augsburg DE033L1 50000 Water 2020-09 25282-DE033L1 1453.911436 6.603690e+04 None 0 MULTIPOLYGON (((10.95771 48.41587, 10.95794 48... 0
24669 DE Augsburg DE033L1 50000 Water 2020-09 25269-DE033L1 225.641129 1.716125e+03 None 0 MULTIPOLYGON (((10.89601 48.38764, 10.89523 48... 0
24636 DE Augsburg DE033L1 50000 Water 2020-09 25224-DE033L1 2268.943692 9.009952e+04 None 0 MULTIPOLYGON (((10.93584 48.34512, 10.93607 48... 0
22733 DE Augsburg DE033L1 50000 Water 2020-09 25234-DE033L1 383.435266 4.950758e+03 None 0 MULTIPOLYGON (((10.87671 48.37269, 10.87678 48... 0
24653 DE Augsburg DE033L1 50000 Water 2020-09 25252-DE033L1 1597.411430 2.076930e+04 None 0 MULTIPOLYGON (((10.86536 48.34961, 10.86527 48... 0

630 rows × 13 columns

Only Greens¶

In [ ]:
# Dropping rows with "Airports" and "Railways and associated land"
within_shapefile = within_shapefile[~within_shapefile['class_2018'].isin(['Airports', 'Railways and associated land'])]

# Adding a new column 'green'
green_classes = [
    'Green urban areas',
    'Sports and leisure facilities',
    'Arable land (annual crops)',
    'Pastures',
    'Forests',
    'Herbaceous vegetation associations (natural grassland, moors...)',
    'Water'
]

within_shapefile['green'] = within_shapefile['class_2018'].apply(lambda x: 1 if x in green_classes else 0)
within_shapefile
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\geopandas\geodataframe.py:1543: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
Out[ ]:
country fua_name fua_code code_2018 class_2018 prod_date identifier perimeter area comment Pop2018 geometry index_right green
19905 DE Augsburg DE033L1 21000 Arable land (annual crops) 2020-09 20198-DE033L1 3060.749680 244835.995267 None 1 MULTIPOLYGON (((10.90228 48.42601, 10.90212 48... 0 1
17629 DE Augsburg DE033L1 21000 Arable land (annual crops) 2020-09 17792-DE033L1 1057.958047 44902.012673 None 24 MULTIPOLYGON (((10.86575 48.32851, 10.86555 48... 0 1
17576 DE Augsburg DE033L1 21000 Arable land (annual crops) 2020-09 17679-DE033L1 617.982528 17935.658317 None 5 MULTIPOLYGON (((10.78933 48.35210, 10.78925 48... 0 1
17156 DE Augsburg DE033L1 21000 Arable land (annual crops) 2020-09 17315-DE033L1 6651.721551 639202.864076 None 28 MULTIPOLYGON (((10.82250 48.27254, 10.82214 48... 0 1
18068 DE Augsburg DE033L1 21000 Arable land (annual crops) 2020-09 18175-DE033L1 1364.274144 45343.079105 None 52 MULTIPOLYGON (((10.84571 48.38072, 10.84570 48... 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
24675 DE Augsburg DE033L1 50000 Water 2020-09 25282-DE033L1 1453.911436 66036.904891 None 0 MULTIPOLYGON (((10.95771 48.41587, 10.95794 48... 0 1
24669 DE Augsburg DE033L1 50000 Water 2020-09 25269-DE033L1 225.641129 1716.125084 None 0 MULTIPOLYGON (((10.89601 48.38764, 10.89523 48... 0 1
24636 DE Augsburg DE033L1 50000 Water 2020-09 25224-DE033L1 2268.943692 90099.522615 None 0 MULTIPOLYGON (((10.93584 48.34512, 10.93607 48... 0 1
22733 DE Augsburg DE033L1 50000 Water 2020-09 25234-DE033L1 383.435266 4950.758129 None 0 MULTIPOLYGON (((10.87671 48.37269, 10.87678 48... 0 1
24653 DE Augsburg DE033L1 50000 Water 2020-09 25252-DE033L1 1597.411430 20769.302667 None 0 MULTIPOLYGON (((10.86536 48.34961, 10.86527 48... 0 1

570 rows × 14 columns

In [ ]:
# within_shapefile.explore()
within_shapefile["green"].value_counts()
Out[ ]:
green
0    360
1    210
Name: count, dtype: int64
In [ ]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
from shapely.geometry import box


# Load the shapefile
#shapefile = gpd.read_file(shapefile_path)
shapefile = within_shapefile

# Check if the CRS matches, if not, reproject
if shapefile.crs != raster_crs:
    shapefile = shapefile.to_crs(raster_crs)

# Check that geometries are valid
shapefile['valid'] = shapefile.is_valid
shapefile = shapefile[shapefile['valid']]


with rasterio.open(path_composite) as src:
    raster_bounds = src.bounds
    raster_bbox = box(*raster_bounds)  # Create a bounding box from the raster bounds

    # Only proceed if the raster and shapefile overlap
    if not shapefile.unary_union.intersects(raster_bbox):
        raise ValueError("Shapefile and raster do not overlap")
    else:
        print("The shapefile and raster overlap.")

    # Extract the raster values within the polygon
    for geom in shapefile.geometry:
        feature = [mapping(geom)]
        out_image, out_transform = mask(src, feature, crop=True)
        # process out_image
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\geopandas\geodataframe.py:1543: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
The shapefile and raster overlap.
In [ ]:
import numpy as np

# Initialize a list to hold the mean values for each band
band_means = []

# Extract the raster values within the polygon
for geom in shapefile.geometry:
    feature = [mapping(geom)]
    
    with rasterio.open(path_composite) as src:
        out_image, out_transform = mask(src, feature, crop=True)
        
        # Calculate mean of each band, excluding no-data values
        means = np.ma.array(out_image, mask=out_image == src.nodata).mean(axis=(1, 2))
        band_means.append(means.filled(src.nodata))
In [ ]:
with rasterio.open(path_composite) as src:
    raster_crs = src.crs
shapefile_crs = shapefile.crs

print("Raster CRS: ", raster_crs)
print("Shapefile CRS: ", shapefile_crs)
Raster CRS:  EPSG:4326
Shapefile CRS:  GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
In [ ]:
with rasterio.open(path_composite) as src:
    raster_bounds = src.bounds

# Use the total_bounds attribute of the shapefile to get its bounding coordinates
shapefile_bounds = shapefile.total_bounds

print("Raster bounds: ", raster_bounds)
print("Shapefile bounds: ", shapefile_bounds)
Raster bounds:  BoundingBox(left=10.761412564981446, bottom=48.25669556872475, right=10.963972462286222, top=48.46144248805698)
Shapefile bounds:  [10.76761978 48.2578022  10.96264273 48.46011337]
In [ ]:
import matplotlib.pyplot as plt
import rasterio
import rasterio.plot

# Open the raster file
with rasterio.open(path_composite) as src:
    # Plot the raster
    fig, ax = plt.subplots()
    rasterio.plot.show(src, ax=ax)
    # Overlay the shapefile
    shapefile.plot(ax=ax, color='red', alpha=0.5)
    plt.show()
No description has been provided for this image
In [ ]:
import numpy as np
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
import geopandas as gpd
from rasterio.features import geometry_mask

img_fp = path_composite

X = np.array([], dtype=np.float32).reshape(0, 3)  # Replace '8' with the actual number of bands if different
y = []  # Initialize y as an empty list

incomplete_features = []

with rasterio.open(img_fp) as src:
    raster_crs = src.crs
    nodata = src.nodatavals[0]  # Assuming all bands have the same nodata value
    band_count = src.count

    # Iterate over the geometries in the shapefile
    for index, row in shapefile.iterrows():
        geom = row['geometry']
        # Create a mask for the current geometry
        geom_mask = geometry_mask([geom], transform=src.transform, invert=True, out_shape=(src.height, src.width))

        # Check if the geometry intersects with the raster
        if not geom_mask.all():
            out_image, out_transform = mask(src, [geom], crop=True, nodata=nodata)
            out_image_masked = np.ma.masked_equal(out_image, nodata)
            valid_data = out_image_masked.compressed()

            # Ensure we have a complete set of pixels for all bands
            if valid_data.size % band_count == 0:
                out_image_reshaped = valid_data.reshape(-1, band_count)

                # Remove rows that contain nodata values
                valid_rows = np.all(out_image_reshaped != nodata, axis=1)
                out_image_reshaped = out_image_reshaped[valid_rows]

                # Only proceed if we have valid data left after removing nodata rows
                if out_image_reshaped.size > 0:
                    X = np.vstack((X, out_image_reshaped))
                    y.extend([row['green']] * out_image_reshaped.shape[0])
            else:
                # Collect indices of incomplete features to handle later
                incomplete_features.append(index)
                print(f"Feature {index} does not have a complete set of pixels.")

# Handle incomplete features as needed
# For example, you might want to drop them from the shapefile
shapefile = shapefile.drop(incomplete_features)
In [ ]:
#shapefile
In [ ]:
# Initialize empty arrays for the pixel values and labels
X = np.array([], dtype=np.float32).reshape(0, 3)  # Replace '8' with the number of bands
y = []

with rasterio.open(img_fp) as src:
    # Ensure the shapefile is in the same CRS as the raster
    shapefile = shapefile.to_crs(src.crs)
    nodata = src.nodatavals[0]  # Assuming all bands have the same nodata value

    # Loop through each feature in the shapefile
    for index, row in shapefile.iterrows():
        geom = row.geometry
        # Mask the raster with the geometry
        out_image, out_transform = mask(src, [geom], crop=True, nodata=nodata, filled=False)

        # Check if there is any valid data
        if np.any(out_image.mask == False):
            # Reshape and append to X and y
            valid_pixels = out_image.data[~out_image.mask].reshape(-1, src.count)
            X = np.vstack((X, valid_pixels))
            y.extend([row['green']] * valid_pixels.shape[0])
In [ ]:
out_image
Out[ ]:
masked_array(
  data=[[[--, --, --, ..., 1581.0, --, --],
         [--, --, --, ..., 721.0, 691.0, --],
         [--, --, --, ..., 918.0, 1743.0, --],
         ...,
         [--, --, 1554.0, ..., --, --, --],
         [--, 1769.0, 688.0, ..., --, --, --],
         [--, --, 1071.0, ..., --, --, --]],

        [[--, --, --, ..., 174.0, --, --],
         [--, --, --, ..., 152.0, 122.0, --],
         [--, --, --, ..., 179.0, 156.0, --],
         ...,
         [--, --, 417.0, ..., --, --, --],
         [--, 401.0, 236.0, ..., --, --, --],
         [--, --, 865.0, ..., --, --, --]],

        [[--, --, --, ..., 310.0, --, --],
         [--, --, --, ..., 222.0, 167.0, --],
         [--, --, --, ..., 251.0, 246.0, --],
         ...,
         [--, --, 473.0, ..., --, --, --],
         [--, 572.0, 345.0, ..., --, --, --],
         [--, --, 743.0, ..., --, --, --]]],
  mask=[[[ True,  True,  True, ..., False,  True,  True],
         [ True,  True,  True, ..., False, False,  True],
         [ True,  True,  True, ..., False, False,  True],
         ...,
         [ True,  True, False, ...,  True,  True,  True],
         [ True, False, False, ...,  True,  True,  True],
         [ True,  True, False, ...,  True,  True,  True]],

        [[ True,  True,  True, ..., False,  True,  True],
         [ True,  True,  True, ..., False, False,  True],
         [ True,  True,  True, ..., False, False,  True],
         ...,
         [ True,  True, False, ...,  True,  True,  True],
         [ True, False, False, ...,  True,  True,  True],
         [ True,  True, False, ...,  True,  True,  True]],

        [[ True,  True,  True, ..., False,  True,  True],
         [ True,  True,  True, ..., False, False,  True],
         [ True,  True,  True, ..., False, False,  True],
         ...,
         [ True,  True, False, ...,  True,  True,  True],
         [ True, False, False, ...,  True,  True,  True],
         [ True,  True, False, ...,  True,  True,  True]]],
  fill_value=-3.4e+38,
  dtype=float32)
In [ ]:
# Assuming 'shapefile' is a GeoDataFrame that has been properly read and is in the correct CRS
geoms = shapefile.geometry.values  # This will give us a numpy array of geometry objects

X = np.array([], dtype=np.float32).reshape(0, band_count)  # Adjust dtype and band_count as needed
y = []

with rasterio.open(img_fp) as src:
    for geom in geoms:  # Loop through each geometry in the array
        feature = [mapping(geom)]  # Convert to format expected by rasterio.mask.mask

        out_image, out_transform = mask(src, feature, crop=True)
        
        # Check for pixels that are not nodata (neither 0 nor 255 in all bands)
        if out_image.any():  # If there's any non-nodata pixel
            # Filter out the nodata pixels and reshape
            out_image_reshaped = out_image[:, (out_image[0] != nodata) & (out_image[0] != 0) & (out_image[0] != 255)].reshape(-1, band_count)
            
            # Now, we need to get the corresponding class for each geometry
            class_label = shapefile.loc[shapefile.geometry == geom, 'green'].values[0]
            
            # Extend the X and y arrays
            X = np.vstack((X, out_image_reshaped))  # Stack the pixels
            y.extend([class_label] * out_image_reshaped.shape[0])  # Extend the labels
In [ ]:
# What are our classification labels?
labels = np.unique(shapefile["green"])
print('The training data include {n} classes: {classes}\n'.format(n=labels.size, 
                                                                classes=labels))

# We will need a "X" matrix containing our features, and a "y" array containing our labels
print('Our X matrix is sized: {sz}'.format(sz=X.shape))
print('Our y array is sized: {sz}'.format(sz=np.array(y).shape))
The training data include 2 classes: [0 1]

Our X matrix is sized: (287200, 3)
Our y array is sized: (287200,)
In [ ]:
fig, ax = plt.subplots(figsize=[13, 6])

# Numbers 1-8 for bands
band_count = np.arange(1, 4)

# Convert y to numpy array for indexing
y_array = np.array(y)

# Iterate over unique classes
classes = np.unique(y_array)
for class_type in classes:
    # Calculate mean reflectance value for each band for the current class
    band_intensity = np.mean(X[y_array == class_type, :], axis=0)

    # Plot the mean reflectance values on the single plot
    ax.plot(band_count, band_intensity, label=class_type)

# Add axis labels
ax.set_xlabel('Band #')
ax.set_ylabel('Reflectance Value')

# Add a title
ax.set_title('Band Intensities Full Overview')

# Add legend outside of the plot
ax.legend(loc='upper left', bbox_to_anchor=(1, 1))

# Adjust layout to accommodate the legend
plt.tight_layout()

# Display the plot
plt.show()
No description has been provided for this image
In [ ]:
def str_class_to_int(class_array):
    class_array[class_array == 0] = 0
    class_array[class_array == 1] = 1
    return(class_array.astype(int))

GaussianNB Naive Bayes¶

In [ ]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
gnb.fit(X, y)
Out[ ]:
GaussianNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GaussianNB()
In [ ]:
from rasterio.plot import show
from rasterio.plot import show_hist
from rasterio.windows import Window
from rasterio.plot import reshape_as_raster, reshape_as_image
In [ ]:
with rasterio.open(img_fp) as src:
    # may need to reduce this image size if your kernel crashes, takes a lot of memory
    img = src.read()[:, 400:950, 450:1500]

# Take our full image and reshape into long 2d array (nrow * ncol, nband) for classification
print(img.shape)
reshaped_img = reshape_as_image(img)
print(reshaped_img.shape)
(3, 550, 1050)
(550, 1050, 3)
In [ ]:
class_prediction = gnb.predict(reshaped_img.reshape(-1, 3))

# Reshape our classification map back into a 2D matrix so we can visualize it
class_prediction = class_prediction.reshape(reshaped_img[:, :, 0].shape)
In [ ]:
class_prediction = str_class_to_int(class_prediction)
In [ ]:
def color_stretch(image, index):
    colors = image[:, :, index].astype(np.float64)
    for b in range(colors.shape[2]):
        colors[:, :, b] = rasterio.plot.adjust_band(colors[:, :, b])
    return colors
    
# find the highest pixel value in the prediction image
n = int(np.max(class_prediction))

# next setup a colormap for our map
colors = dict((
    (0, (255, 215, 0, 255)),    # Airports - Gold
    (1, (34, 139, 34, 255)),    # Arable Land - Forest Green
))

# Put 0 - 255 as float 0 - 1
for k in colors:
    v = colors[k]
    _v = [_v / 255.0 for _v in v]
    colors[k] = _v
    
index_colors = [colors[key] if key in colors else 
                (255, 255, 255, 0) for key in range(0, n+1)]

cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n+1)
In [ ]:
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap

# Define the class labels and their corresponding colors
class_labels = {
    0: "Not Green",
    1: "Green",
}

patches = [mpatches.Patch(color=colors[key], label=class_labels[key]) for key in class_labels]

fig, axs = plt.subplots(2,1,figsize=(20,15))

img_stretched = color_stretch(reshaped_img, [0, 1, 2])
axs[0].imshow(img_stretched)

axs[1].imshow(class_prediction, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)

fig.show()
C:\Users\leoni\AppData\Local\Temp\ipykernel_17072\814845198.py:20: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
  fig.show()
No description has been provided for this image
In [ ]:
# with rasterio.open(img_fp) as src:
#     green_band = src.read(3)
#     red_band = src.read(4)
#     nir_band = src.read(8)
    
# ndwi = (green_band.astype(float) - nir_band.astype(float)) / (green_band.astype(float) + nir_band.astype(float))
# ndvi = (nir_band.astype(float) - red_band.astype(float)) / (red_band.astype(float) + nir_band.astype(float))
In [ ]:
# ndwi = ndwi[150:600, 250:1400]
# ndvi = ndvi[150:600, 250:1400]
In [ ]:
# fig, axs = plt.subplots(2,2,figsize=(15,7))

# img_stretched = color_stretch(reshaped_img, [3, 2, 1])
# axs[0,0].imshow(img_stretched)

# axs[0,1].imshow(class_prediction, cmap=cmap, interpolation='none')

# nwdi_plot = axs[1,0].imshow(ndwi, cmap="RdYlGn")
# axs[1,0].set_title("NDWI")
# fig.colorbar(nwdi_plot, ax=axs[1,0])

# ndvi_plot = axs[1,1].imshow(ndvi, cmap="RdYlGn")
# axs[1,1].set_title("NDVI")
# fig.colorbar(ndvi_plot, ax=axs[1,1])

# plt.show()
No description has been provided for this image
In [ ]:
fig, axs = plt.subplots(1,2,figsize=(15,15))

img_stretched = color_stretch(reshaped_img, [0, 1, 2])
axs[0].imshow(img_stretched[0:180, 160:350])

axs[1].imshow(class_prediction[0:180, 160:350], cmap=cmap, interpolation='none')

fig.show()
C:\Users\leoni\AppData\Local\Temp\ipykernel_17072\2750166322.py:8: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
  fig.show()
No description has been provided for this image

Random Forest¶

In [ ]:
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import rasterio
import matplotlib.pyplot as plt

# Create the Random Forest model
rf = RandomForestClassifier()

# Train the model
rf.fit(X, y)
Out[ ]:
RandomForestClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier()
In [ ]:
with rasterio.open(img_fp) as src:
    img = src.read()[:, 400:950, 450:1500]
reshaped_img = reshape_as_image(img)

# Predict the classes using the Random Forest model
class_prediction = rf.predict(reshaped_img.reshape(-1, 3))

# Reshape the classification map for visualization
class_prediction = str_class_to_int(class_prediction) 
In [ ]:
def color_stretch(image, index):
    colors = image[:, :, index].astype(np.float64)
    for b in range(colors.shape[2]):
        colors[:, :, b] = rasterio.plot.adjust_band(colors[:, :, b])
    return colors
    
# find the highest pixel value in the prediction image
n = int(np.max(class_prediction))

# next setup a colormap for our map
colors = dict((
    (0, (255, 215, 0, 255)),    # Not green - Gold
    (1, (34, 139, 34, 255)),    # Green Areas - Forest Green
))

# Put 0 - 255 as float 0 - 1
for k in colors:
    v = colors[k]
    _v = [_v / 255.0 for _v in v]
    colors[k] = _v
    
index_colors = [colors[key] if key in colors else 
                (255, 255, 255, 0) for key in range(0, n+1)]

cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n+1)
In [ ]:
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap

# Assuming reshaped_img is in the correct format
expected_shape = reshaped_img[:, :, 0].shape
class_prediction_2d = class_prediction.reshape(expected_shape)

fig, axs = plt.subplots(2, 1, figsize=(20, 15))

img_stretched = color_stretch(reshaped_img, [0, 1, 2])
axs[0].imshow(img_stretched)

# Use the reshaped 2D array for visualization
axs[1].imshow(class_prediction_2d, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)

plt.show()
No description has been provided for this image

KMeans¶

In [ ]:
from sklearn.cluster import KMeans

bands, rows, cols = img.shape

k = 2 # num of clusters

kmeans_predictions = KMeans(n_clusters=k, random_state=0).fit(reshaped_img.reshape(-1, 3))

kmeans_predictions_2d = kmeans_predictions.labels_.reshape(rows, cols)

# Now show the classmap next to the image
fig, axs = plt.subplots(1,2,figsize=(15,8))

img_stretched = color_stretch(reshaped_img, [0, 1, 2]) #try different band combinations
axs[0].imshow(img_stretched)

axs[1].imshow(kmeans_predictions_2d)
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\joblib\externals\loky\backend\context.py:136: UserWarning: Could not find the number of physical cores for the following reason:
found 0 physical cores < 1
Returning the number of logical cores instead. You can silence this warning by setting LOKY_MAX_CPU_COUNT to the number of cores you want to use.
  warnings.warn(
  File "c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")
Out[ ]:
<matplotlib.image.AxesImage at 0x1ad2e4f45d0>
No description has been provided for this image

KNeighbours¶

In [ ]:
from sklearn.neighbors import KNeighborsClassifier

# Create the KNN model
knn = KNeighborsClassifier()

# Train the model
knn.fit(X, y)
Out[ ]:
KNeighborsClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsClassifier()
In [ ]:
with rasterio.open(img_fp) as src:
    img = src.read()[:, 400:950, 450:1500]
reshaped_img = reshape_as_image(img)

# Predict the classes using the KNN model
class_prediction = knn.predict(reshaped_img.reshape(-1, 3))

# Reshape the classification map for visualization
class_prediction = str_class_to_int(class_prediction)
In [ ]:
def color_stretch(image, index):
    colors = image[:, :, index].astype(np.float64)
    for b in range(colors.shape[2]):
        colors[:, :, b] = rasterio.plot.adjust_band(colors[:, :, b])
    return colors
    
# find the highest pixel value in the prediction image
n = int(np.max(class_prediction))

# next setup a colormap for our map
colors = dict((
    (0, (255, 215, 0, 255)),    # Airports - Gold
    (1, (34, 139, 34, 255)),    # Arable Land - Forest Green
))

# Put 0 - 255 as float 0 - 1
for k in colors:
    v = colors[k]
    _v = [_v / 255.0 for _v in v]
    colors[k] = _v
    
index_colors = [colors[key] if key in colors else 
                (255, 255, 255, 0) for key in range(0, n+1)]

cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n+1)
In [ ]:
fig, axs = plt.subplots(2, 1, figsize=(20, 15))

img_stretched = color_stretch(reshaped_img, [0, 1, 2])
axs[0].imshow(img_stretched)

class_prediction_2d = class_prediction.reshape(reshaped_img[:, :, 0].shape)
axs[1].imshow(class_prediction_2d, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)

plt.show()
No description has been provided for this image

Decision Tree¶

In [ ]:
from sklearn.tree import DecisionTreeClassifier

# Create the Decision Tree model
dt = DecisionTreeClassifier()

# Train the model
dt.fit(X, y)
Out[ ]:
DecisionTreeClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
In [ ]:
# Read and reshape the image
with rasterio.open(img_fp) as src:
    img = src.read()[:, 400:950, 450:1500]
reshaped_img = reshape_as_image(img)

# Predict the classes using the Decision Tree model
class_prediction = dt.predict(reshaped_img.reshape(-1, 3))

# Reshape the classification map for visualization
class_prediction = str_class_to_int(class_prediction)
In [ ]:
fig, axs = plt.subplots(2, 1, figsize=(20, 15))

img_stretched = color_stretch(reshaped_img, [0, 1, 2])
axs[0].imshow(img_stretched)

class_prediction_2d = class_prediction.reshape(reshaped_img[:, :, 0].shape)
axs[1].imshow(class_prediction_2d, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)

plt.show()
No description has been provided for this image
In [ ]: